# install.packages(c("snow", "numDeriv"))
library(snow) # For running on MPI server (or locally) in parallel 
library(numDeriv) # If running on MPI server, install on server also

# Location of Functions.R
workingDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"
# Location to output csvs from simulations
outputDirectory <- "~/Dropbox (Personal)/Academic/The Statistical Analysis of Misreporting/ReplicationData"

setwd(workingDirectory)
source("Functions.R")

# NOTE: Set the wallclock on compute server to 24 hours for each set of
# 1000 simulations
# Takes roughly 21 hours to run 1000 simulations on 32 cores on Colosse

########## SIMULATION STUDY 2

param.control <- c("(Intercept)" = -0.25,
                   "continuous" = 0.25,
                   "binary" = -0.5,
                   "U" = 0,
                   "Z" = 0.5)

param.sensitive <- c("(Intercept)" = 0,
                     "continuous" = 0.25,
                     "binary" = -0.5)

param.misreport <- c("(Intercept)" = 0.5,
                        "continuous" = -0.5,
                        "binary" = 0.25)

study2 <- function(n) {
  library(numDeriv)

  J <- 4 # Number of control items
  sensitive.response <- 1 # Whether sensitive item is 0 or 1

  intercept <- rep(1, n)
  continuous <- rnorm(n, 1, 1)
  binary <- sample(0:1, n, replace = TRUE, prob = c(0.5, 0.5))

  x <- cbind(intercept, continuous, binary)
  treat <- sample(rep(c(0, 1), each = n / 2)) # List experiment with 0.5 assigned to control & 0.5 to treatment

  # Simulate for whom Z = 1
  prob.sensitive <- plogis(x %*% param.sensitive)
  true.belief <- rbinom(n, 1, prob = prob.sensitive)

  # Simulate who misreports among those who hold the sensitive belief
  # Multiply by true.belief: by assumption, only those holding sensitive belief will lie
  prob.misreport <- plogis(x %*% param.misreport) * as.numeric(true.belief == sensitive.response)
  misreport <- rbinom(n, 1, prob.misreport)

  respondentType <- as.character(NA)
  respondentType[misreport == 1 & true.belief == sensitive.response] <- "Misreport sensitive"
  respondentType[misreport == 0 & true.belief == sensitive.response] <- "Truthful sensitive"
  respondentType[misreport == 0 & true.belief != sensitive.response] <- "Truthful non-sensitive"

  # Answer to the direct question
  direct <- as.numeric(NA)
  direct[respondentType %in% c("Truthful non-sensitive", "Misreport sensitive")] <- abs(sensitive.response - 1)
  direct[respondentType == "Truthful sensitive"] <- sensitive.response

  # Simulate responses to the control list, conditional on the sensitive belief
  prob.control <- plogis(cbind(x,
                               as.numeric(respondentType == "Misreport sensitive"),
                               as.numeric(respondentType %in% c("Misreport sensitive", "Truthful sensitive")))
                         %*% param.control)

  control.items <- rbinom(n, J, prob.control)

  # Observed response to the list experiment depending on treatment status
  y <- control.items + true.belief * treat

  SimData <- data.frame(y, as.data.frame(x), direct, treat)

  model.standard <- listExperiment(formula = y ~ 1 + continuous + binary,
                                   data = SimData, treatment = "treat", J = 4,
                                   control.constrained = "none",
                                   n.runs = 1, verbose = FALSE)

  model.proposed <- listExperiment(formula = y ~ 1 + continuous + binary,
                                   data = SimData, treatment = "treat", J = 4,
                                   control.constrained = "partial",
                                   direct = "direct", sensitive.response = sensitive.response,
                                   include.misreport.treatment = FALSE,
                                   n.runs = 1, verbose = FALSE)

  return(list(n = n,
              model.standard = model.standard,
              model.proposed = model.proposed))

}

n_cores <- as.numeric(Sys.getenv("MOAB_PROCCOUNT"))
CL <- makeCluster(n_cores, type = "MPI")
# If replicating on personal computer instead of MPI cluster:
# n_cores <- 4 # Number of threads/cores on which to run simulations
# CL <- makeCluster(n_cores, type = "SOCK")


# Note that this file was run multiple times on an MPI cluster to generate
# more than the 1000 simulations listed here. Output includes time-stamped
# file names to permit multiple runs and csvs which are then combined
# in the file SimStudyGraphs.R
n_sims <- 1000

# Export necessary functions to cluster
clusterExport(CL, list("logAdd", "mstepMisreport", "mstepSensitive",
                       "mstepControl", "mstepOutcome",
                       "estep", "listExperiment",
                       "param.control", "param.sensitive", "param.misreport"),
              envir = environment())

# Run simluations
sim.out <- parLapply(CL, rep(c(2000, 3000, 5000), n_sims), study2)

sampleSize <- sapply(sim.out, function(X) X[[1]], simplify = TRUE)
sim.standard <- sapply(sim.out, function(X) X[[2]], simplify = FALSE)
sim.proposed <- sapply(sim.out, function(X) X[[3]], simplify = FALSE)

# Pull required parameters from list of model objects
study2Pars <- function(X) {
  par.control.intercept <- sapply(X, function(X) X$par.control["(Intercept)"])
  se.control.intercept <-  sapply(X, function(X) X$se.control["(Intercept)"])
  par.control.continuous <- sapply(X, function(X) X$par.control["continuous"])
  se.control.continuous <-  sapply(X, function(X) X$se.control["continuous"])
  par.control.binary <- sapply(X, function(X) X$par.control["binary"])
  se.control.binary <-  sapply(X, function(X) X$se.control["binary"])
  par.control.z <- sapply(X, function(X) X$par.control["Z == 1"])
  se.control.z <-  sapply(X, function(X) X$se.control["Z == 1"])

  par.sensitive.intercept <- sapply(X, function(X) X$par.sensitive["(Intercept)"])
  se.sensitive.intercept <-  sapply(X, function(X) X$se.sensitive["(Intercept)"])
  par.sensitive.continuous <- sapply(X, function(X) X$par.sensitive["continuous"])
  se.sensitive.continuous <-  sapply(X, function(X) X$se.sensitive["continuous"])
  par.sensitive.binary <- sapply(X, function(X) X$par.sensitive["binary"])
  se.sensitive.binary <-  sapply(X, function(X) X$se.sensitive["binary"])

  if(!is.null(sapply(X, function(X) X$par.misreport)[[1]])) {
    par.misreport.intercept <- sapply(X, function(X) X$par.misreport["(Intercept)"])
    se.misreport.intercept <-  sapply(X, function(X) X$se.misreport["(Intercept)"])
    par.misreport.continuous <- sapply(X, function(X) X$par.misreport["continuous"])
    se.misreport.continuous <-  sapply(X, function(X) X$se.misreport["continuous"])
    par.misreport.binary <- sapply(X, function(X) X$par.misreport["binary"])
    se.misreport.binary <-  sapply(X, function(X) X$se.misreport["binary"])
  } else {
    par.misreport.intercept <- se.misreport.intercept <- NA
    par.misreport.continuous <- se.misreport.continuous <- NA
    par.misreport.binary <- se.misreport.binary <- NA
  }

  Out <- data.frame(par.control.intercept, se.control.intercept,
                    par.control.continuous, se.control.continuous,
                    par.control.binary, se.control.binary,
                    par.control.z, se.control.z,
                    par.sensitive.intercept, se.sensitive.intercept,
                    par.sensitive.continuous, se.sensitive.continuous,
                    par.sensitive.binary, se.sensitive.binary,
                    par.misreport.intercept, se.misreport.intercept,
                    par.misreport.continuous, se.misreport.continuous,
                    par.misreport.binary, se.misreport.binary)

  return(Out)
}

standard <- cbind(study2Pars(sim.standard),
                  model = "Standard estimator", n = sampleSize)
proposed <- cbind(study2Pars(sim.proposed),
                  model = "Proposed estimator", n = sampleSize)

options(digits.secs = 6) # Increase Sys.time() accuracy to 6 digits to avoid filename overlap
write.csv(rbind(standard, proposed),
          paste0(outputDirectory, "/Sim2-", gsub(":|\\.", "-", Sys.time()), ".csv"), row.names = FALSE)

stopCluster(CL)
